#################################################################################
# Replication file for:                                                         #
# "Balancing Precision and Retention in Experimental Design"                    #
#                                                                               #
# Gustavo Diaz                                                                  #
# Northwestern University                                                       #
# gustavo.diaz@northwestern.edu                                                 #
#                                                                               #
# Erin L. Rossiter                                                              #
# University of Notre Dame                                                      #
# erossite@nd.edu                                                               #
#                                                                               #
# This file produces Appendix Figure G10.                                       #                                          
#################################################################################

# Setup -----

# ggplot global options
theme_set(theme_bw(base_size = 20))

# Simulation -----

# Declare block randomized experiment with correlated sample loss
# Compare with uncorrelated sample loss
# Make blocking more consequential than in main paper
# In this case tau is a vector of size two, one effect per block
block_designer <- function(N, tau, p){
  # Population
  pop <- declare_population(
    N = N,
    x = rnorm(N),
    b = ifelse(x > 0, 1, 0),
    tau = ifelse(b == 1, tau, 0.4 - tau),
    drop = ifelse(b == 1, 
                  sample(c(0,1),
                         N, 
                         replace = T, 
                         prob = c((1-p),p)),
                  0),
    # Reverse so that report = 1
    R = ifelse(drop == 1, 0, 1)
    )
  
  # Potential outcomes
  pot_c <- declare_potential_outcomes(Y ~ tau * Z_c + x,
                                      assignment_variables= "Z_c")
  
  pot_b <- declare_potential_outcomes(Y ~ tau * Z_b + x,
                                      assignment_variables= "Z_b")
  
  # Estimands
  estimand_c <- declare_inquiry(ATE_c = mean(Y_Z_c_1 - Y_Z_c_0))
  
  estimand_b <- declare_inquiry(ATE_b = mean(Y_Z_b_1 - Y_Z_b_0))
  
  # Assignments
  assign_c <- declare_assignment(Z_c = complete_ra(N = N))
  
  assign_b <-declare_assignment(Z_b = block_ra(blocks = b))
  
  # Reveal outcomes
  reveal_c <- declare_reveal(outcome_variables = Y,
                             assignment_variables = Z_c)
  
  edit_c <- declare_step(Y_c = Y, handler = fabricate)
  
  reveal_b <- declare_reveal(outcome_variables = Y,
                             assignment_variables = Z_b)
  
  edit_b <- declare_step(Y_b = Y, handler = fabricate)
  
  # Estimators
  post_c <- declare_estimator(Y_c ~ Z_c, .method = lm_robust,
                              inquiry = "ATE_c",
                              label = "complete_ra")
  
  post_b <- declare_estimator(Y_b ~ Z_b, .method = lm_robust,
                              inquiry = "ATE_b", 
                              subset = R == 1,
                              fixed_effects = ~b,
                              label = "block_ra")
  
  # Return design
  pop + 
    pot_c + pot_b +
    estimand_c + estimand_b +
    assign_c + assign_b +
    reveal_c + edit_c +
    reveal_b + edit_b +
    post_c + post_b


}

# Expand design
designs <- expand_design(
  designer = block_designer,
  N = 1000, # sample size
  tau = c(0.2, 0.3), # Linear additive effect
  p = seq(0, 0.8, by = 0.1), # sample loss rate
  expand = TRUE
)

# Simulate
set.seed(20230424)
sim_nonrandom = simulate_design(designs, sims = 1000)


# Visualize -----

# Diagnose
diag_nr = sim_nonrandom %>% 
  group_by(p, estimator, tau) %>% 
  summarize(
    powerp = mean(p.value <= .05, na.rm = TRUE),
    powerci = mean(0 > conf.high | 0 < conf.low, na.rm = TRUE),
    meanp = mean(p.value, na.rm = TRUE),
    coverage = mean(estimand <= conf.high & estimand >= conf.low, na.rm = TRUE),
    mean_estimate = mean(estimate, na.rm = TRUE),
    bias = mean(estimate - estimand, na.rm = TRUE),
    mse = mean((estimate - estimand)^2, na.rm = TRUE),
    rmse = sqrt(mean((estimate - estimand)^2, na.rm = TRUE)), 
    mad = mean(abs(estimate - estimand), na.rm = TRUE),
    sd_estimate = sd(estimate, na.rm = TRUE),
    mean_se = mean(std.error, na.rm = TRUE),
    mean_estimand = mean(estimand, na.rm = TRUE),
    nestimand = length(unique(estimand)),
    nsims = n()
  ) %>% 
  mutate(estimator = recode(estimator,
                            "block_ra" = "Block",
                            "complete_ra" = "Complete"),
         tau = recode(tau,
                      `0.3` = "(0.3, 0.1)",
                      `0.2` = "(0.2, 0.2)"))

# recode order of estimators
diag_nr$estimator = fct_relevel(diag_nr$estimator,
                                "Block",
                                "Complete")

diag_nr$power_label = "Power"

diag_nr$bias_label = "Bias"

# Plot
power_nr = ggplot(diag_nr) +
  aes(x = p, y = powerp, shape = as.factor(tau), color = estimator) +
  geom_point(size = 3) + geom_line(size = 1) +
  theme(legend.position = "bottom") +
  labs(x = "Sample loss rate",
       y = expression(paste('Power at ', alpha  == 0.05)),
       color = "Randomization",
       shape = "True effects") +
  scale_color_grey(start = 0, end = 0.5) +
  facet_wrap(~ power_label) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.2), limits = c(0, 1))

bias_nr = ggplot(diag_nr) +
  aes(x = p, y = abs(bias), shape = as.factor(tau), color = estimator) +
  geom_point(size = 3) + geom_line(size = 1) +
  theme(legend.position = "bottom") +
  labs(x = "Sample loss rate",
       y = "Aboslute bias: |Mean(estimate - estimand)|",
       color = "Randomization",
       shape = "True effects") +
  scale_color_grey(start = 0, end = 0.5) +
  facet_wrap(~ bias_label)


power_bias = ggarrange(power_nr, bias_nr, ncol = 2, nrow = 1, 
                       common.legend = TRUE, legend = "bottom")

power_bias

#save
ggsave(plot = power_bias, filename = "figures/figG10.pdf",
       height = 6, width = 12)